From e95b0bc6d7ccfb85cd71ea2b5e637e3d0144915b Mon Sep 17 00:00:00 2001 From: "kaf24@freefall.cl.cam.ac.uk" Date: Wed, 27 Oct 2004 16:20:31 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.290 (417fcacfkfQjkHbB1_isdievDAarQA) Further fixes to the TLB-flush logic. --- xen/arch/x86/domain.c | 1 - xen/arch/x86/flushtlb.c | 45 ++++++++++++++++++++------------- xen/arch/x86/memory.c | 10 ++------ xen/arch/x86/pdb-stub.c | 4 +-- xen/arch/x86/smp.c | 7 ----- xen/arch/x86/traps.c | 3 +-- xen/common/page_alloc.c | 2 +- xen/include/asm-x86/flushtlb.h | 41 ++++++++++++++++++++++-------- xen/include/asm-x86/page.h | 8 ------ xen/include/asm-x86/processor.h | 2 +- 10 files changed, 63 insertions(+), 60 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index af4bffb106..98e5438497 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -387,7 +387,6 @@ void switch_to(struct domain *prev_p, struct domain *next_p) /* Switch page tables. */ write_ptbase(&next_p->mm); - tlb_clocktick(); } if ( unlikely(prev_p->io_bitmap != NULL) || diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index a7149f4a96..023fcd354c 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -12,38 +12,47 @@ #include #include -unsigned long tlbflush_epoch_changing; u32 tlbflush_clock; u32 tlbflush_time[NR_CPUS]; -void tlb_clocktick(void) +void write_cr3(unsigned long cr3) { - u32 y, ny; + u32 t, t1, t2; unsigned long flags; local_irq_save(flags); - /* Tick the clock. 'y' contains the current time after the tick. */ - ny = tlbflush_clock; + /* + * Tick the clock, which is incremented by two each time. The L.S.B. is + * used to decide who will control the epoch change, when one is required. + */ + t = tlbflush_clock; do { -#ifdef CONFIG_SMP - if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) ) + t1 = t; /* t1: Time before this clock tick. */ + t2 = t + 2; /* t2: Time after this clock tick. */ + if ( unlikely(t2 & 1) ) { - /* Epoch is changing: the first to detect this is the leader. */ - if ( unlikely(!test_and_set_bit(0, &tlbflush_epoch_changing)) ) - raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ); - /* The clock doesn't tick again until end of the epoch change. */ - y--; - break; + /* Epoch change: someone else is leader. */ + t2 = t; /* no tick */ + goto skip_clocktick; + } + else if ( unlikely((t2 & TLBCLOCK_EPOCH_MASK) == 0) ) + { + /* Epoch change: we may become leader. */ + t2--; /* half tick */ } -#else - y = ny+1; -#endif } - while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) ); + while ( unlikely((t = cmpxchg(&tlbflush_clock, t1, t2)) != t1) ); + + /* Epoch change: we are the leader. */ + if ( unlikely(t2 & 1) ) + raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ); + + skip_clocktick: + __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (cr3) : "memory" ); /* Update this CPU's timestamp to new time. */ - tlbflush_time[smp_processor_id()] = y; + tlbflush_time[smp_processor_id()] = t2; local_irq_restore(flags); } diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c index ad1d6f0ef5..1d390933a1 100644 --- a/xen/arch/x86/memory.c +++ b/xen/arch/x86/memory.c @@ -781,7 +781,7 @@ void put_page_type(struct pfn_info *page) if ( unlikely((nx & PGT_count_mask) == 0) ) { /* Record TLB information for flush later. Races are harmless. */ - page->tlbflush_timestamp = tlbflush_clock; + page->tlbflush_timestamp = tlbflush_current_time(); if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && likely(nx & PGT_validated) ) @@ -989,13 +989,7 @@ static int do_extended_command(unsigned long ptr, unsigned long val) write_ptbase(&d->mm); - put_page_and_type(&frame_table[old_base_pfn]); - - /* - * Note that we tick the clock /after/ dropping the old base's - * reference count. If the page tables got freed then this will - * avoid unnecessary TLB flushes when the pages are reused. */ - tlb_clocktick(); + put_page_and_type(&frame_table[old_base_pfn]); } else { diff --git a/xen/arch/x86/pdb-stub.c b/xen/arch/x86/pdb-stub.c index c4e73e2d99..c8d5421feb 100644 --- a/xen/arch/x86/pdb-stub.c +++ b/xen/arch/x86/pdb-stub.c @@ -1089,9 +1089,7 @@ int pdb_handle_exception(int exceptionVector, int signal = 0; struct pdb_breakpoint* bkpt; int watchdog_save; - unsigned long cr3; - - __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + unsigned long cr3 = read_cr3(); /* If the exception is an int3 from user space then pdb is only interested if it re-wrote an instruction set the breakpoint. diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index 9efe37f231..a7172908bb 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -286,13 +286,6 @@ void new_tlbflush_clock_period(void) /* No need for atomicity: we are the only possible updater. */ tlbflush_clock++; - - /* Finally, signal the end of the epoch-change protocol. */ - wmb(); - tlbflush_epoch_changing = 0; - - /* In case we got to the end of the next epoch already. */ - tlb_clocktick(); } static void flush_tlb_all_pge_ipi(void* info) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 7960848b53..e46bc94106 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -475,8 +475,7 @@ asmlinkage void do_general_protection(struct pt_regs *regs, long error_code) #ifdef XEN_DEBUGGER if ( pdb_initialized && (pdb_ctx.system_call != 0) ) { - unsigned long cr3; - __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); + unsigned long cr3 = read_cr3(); if ( cr3 == pdb_ctx.ptbr ) pdb_linux_syscall_enter_bkpt(regs, error_code, ti); } diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 0abd061bd4..72e13ec9d3 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -451,7 +451,7 @@ void free_domheap_pages(struct pfn_info *pg, int order) for ( i = 0; i < (1 << order); i++ ) { ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0); - pg[i].tlbflush_timestamp = tlbflush_clock; + pg[i].tlbflush_timestamp = tlbflush_current_time(); pg[i].u.free.cpu_mask = 1 << d->processor; list_del(&pg[i].list); diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h index 8df9849a49..8b068e3191 100644 --- a/xen/include/asm-x86/flushtlb.h +++ b/xen/include/asm-x86/flushtlb.h @@ -15,13 +15,9 @@ /* * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed. - * Therefore, if the current TLB time and a previously-read timestamp differ - * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock - * has wrapped at least once and every CPU's TLB is guaranteed to have been - * flushed meanwhile. * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock. */ -#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1) +#define TLBCLOCK_EPOCH_MASK ((1U<<20)-1) /* * 'cpu_stamp' is the current timestamp for the CPU we are testing. @@ -32,22 +28,39 @@ static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp) { /* * Worst case in which a flush really is required: - * CPU has not flushed since end of last epoch (cpu_stamp = 0x0000ffff). - * Clock has run to end of current epoch (clock = 0x0001ffff). - * Therefore maximum valid difference is 0x10000 (EPOCH_MASK + 1). + * 1. CPU has not flushed since end of last epoch. + * 2. Clock has run to end of current epoch. + * THEREFORE: Maximum valid difference is (EPOCH_MASK + 1). * N.B. The clock cannot run further until the CPU has flushed once more - * and updated its stamp to 0x1ffff, so this is as 'far out' as it can get. + * and updated to current time, so this is as 'far out' as it can get. */ return ((lastuse_stamp - cpu_stamp) <= (TLBCLOCK_EPOCH_MASK + 1)); } -extern unsigned long tlbflush_epoch_changing; +/* + * The least significant bit of the clock indicates whether an epoch-change + * is in progress. All other bits form the counter that is incremented on + * each clock tick. + */ extern u32 tlbflush_clock; extern u32 tlbflush_time[NR_CPUS]; -extern void tlb_clocktick(void); +#define tlbflush_current_time() tlbflush_clock + extern void new_tlbflush_clock_period(void); +/* Read pagetable base. */ +static inline unsigned long read_cr3(void) +{ + unsigned long cr3; + __asm__ __volatile__ ( + "mov"__OS" %%cr3, %0" : "=r" (cr3) : ); + return cr3; +} + +/* Write pagetable base and implicitly tick the tlbflush clock. */ +extern void write_cr3(unsigned long cr3); + /* * TLB flushing: * @@ -59,6 +72,12 @@ extern void new_tlbflush_clock_period(void); * and page-granular flushes are available only on i486 and up. */ +#define __flush_tlb() \ + do { \ + unsigned long cr3 = read_cr3(); \ + write_cr3(cr3); \ + } while ( 0 ) + #ifndef CONFIG_SMP #define flush_tlb() __flush_tlb() diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index d65295c802..bf4011028c 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -133,14 +133,6 @@ typedef struct { unsigned long pt_lo; } pagetable_t; extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE]; extern void paging_init(void); -#define __flush_tlb() \ - do { \ - __asm__ __volatile__ ( \ - "mov %%cr3, %%"__OP"ax; mov %%"__OP"ax, %%cr3" \ - : : : "memory", __OP"ax" ); \ - tlb_clocktick(); \ - } while ( 0 ) - /* Flush global pages as well. */ #define __pge_off() \ diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index 770d7cf301..f6951c0bc1 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -404,7 +404,7 @@ static inline void write_ptbase(struct mm_struct *mm) else pa = pagetable_val(mm->pagetable); - __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (pa) : "memory" ); + write_cr3(pa); } #define IDLE0_MM \ -- 2.30.2